clear


set seed 1073741823

//ssc install randomselect


/* First session */
insheet using "${pathdata_robustness}/RobustnessExperiment5_raw_wave1.csv", names
drop in 1/2

drop if writedowninfo=="1"

destring *, replace 

// ONLY NEEDED FOR BONUS PAYMENTS
//draw selected participants for bonus
randomselect, gen(sample) prop(0.1)
tab sample

// Randomly draw participants who would, if eligible, receive a bonus (30$) for for correct recall and for the belief stated in parts 1 and 2, respectively.
randomselect, gen(bonus_recall) prop(0.333)
randomselect if bonus_recall == 0, gen(bonus_belief) prop(0.5)

replace bonus_recall = . if sample == 0

replace bonus_belief = 2 if bonus_belief ==1
replace bonus_belief = 1 if bonus_belief ==0 & bonus_recall==0
replace bonus_belief = . if sample == 0


// Randomly draw product for which the bonus payment on beliefs (30$) is carried out, 

randomselect if sample == 1, gen(bicycle) prop(0.333)
randomselect if sample == 1 & bicycle == 0, gen(restaurant) prop(0.5)

// Note 1: videogame; 2: bicycle; 3: restaurant
gen bonusproduct = ""
replace bonusproduct = "videogame" if sample == 1 & bicycle ==0 & restaurant==0
replace bonusproduct = "bicycle" if sample == 1 & bicycle ==1
replace bonusproduct = "restaurant" if sample == 1 & restaurant ==1



/// Calculate time spent on prompt page

gen bicycle1_prompt = (v91 != "")

gen restaurant1_prompt = (v85 != "")

gen videogame1_prompt = (_storyprompt != "")

gen bicycle2_prompt = (v58 != "")

gen restaurant2_prompt = (v52 != "")

gen videogame2_prompt = (_prompt != "")

gen pagetime_prompt = 0

replace pagetime_prompt = _q775_pagesubmit - _q775_firstclick if videogame2_prompt == 1

replace pagetime_prompt = v56 - v54 if restaurant2_prompt == 1

replace pagetime_prompt = v62 - v60 if bicycle2_prompt == 1

replace pagetime_prompt = _q876_pagesubmit - _q876_firstclick if videogame1_prompt == 1

replace pagetime_prompt = v89 - v87 if restaurant1_prompt == 1

replace pagetime_prompt = v95 - v93 if bicycle1_prompt == 1


/// concatenate all prompts
replace _prompt = _prompt + v85 + v58 + v52 + _storyprompt + v91




/* Variables to keep */
keep condition_* p_guess* _prompt task* prolific_pid positive_stat negative_stat treatment tot_reviews* story_type* truepos_fraction* sample bonus_belief bonus_recall bonusproduct qid9 qid124 qid12 qid18 durationinseconds finished comprehension1 comprehension2 comprehension3 comprehension5 q202 writedowninfo pagetime_prompt
egen length_prompt = nwords(_prompt)




	rename qid9 gender 

	rename qid124 age

	rename qid12 education

	rename qid18 employment

	* Indicator Male
	gen male = .
	replace male = 0 if inlist(gender, 2,3)
	replace male = 1 if gender == 1

	* Indicator Bachelor's degree or more
	gen college = .
	replace college = 0 if inlist(education, 1,2,3,4)
	replace college = 1 if inlist(education, 5,6,7,8)

	* Indicator Employment status
	gen employed = .
	replace employed = 0 if inlist(employment,3,4)
	replace employed = 1 if inlist(employment,1,2)

	* Indicator wave 1
	gen wave1 = 1
	
	* Indicator finished wave 1 - EMPLOYMENT IS THE LAST MANDATORY QUESTION IN THE SURVEY
	gen completed_wave1 = 0
	replace completed_wave1 = 1 if inlist(employment,1,2,3,4)

	duplicates drop prolific_pid, force

	tab treatment

	reshape long condition_ p_guess_ task_ truepos_fraction_, i(prolific_pid) j(product) string

save "${pathdata_robustness}/temp/RobustnessExperiment5_long.dta", replace


duplicates drop prolific_pid, force
drop if prolific_pid==""
keep prolific_pid treatment

tab treatment 

save "${pathdata_robustness}/temp/RobustnessExperiment5_prolific_pid_only.dta", replace

clear

/* Second (recall) session */
insheet using "${pathdata_robustness}/RobustnessExperiment5_raw_wave2.csv", names
drop in 1/2

destring *, replace

/* Need to recover valence recall variables from the responses (since by removing the question regarding certainty, the javascript got messed up!) */
drop valence_bicycle valence_restaurant valence_videogame

/* Sum up valence recall (since this is registered in different questions) for each product */
egen valence_videogame = rowtotal(_valence_story _valence_stat _valence_dontremember)
egen valence_bicycle = rowtotal(v31 v32 v33)
egen valence_restaurant = rowtotal(v40 v41 v42)


/* Need to recover type recall variables from the responses (since it was messed up with stories in the Javascript, it was registered as two different questions) */
drop recall_videogame recall_restaurant recall_bicycle

egen recall_videogame = rowtotal(_typeofinfo1 _typeofinfo2)
egen recall_bicycle = rowtotal(v29 v30)
egen recall_restaurant = rowtotal(v38 v39)

/* Variables to keep */
keep beliefs_* recall_* cu_* prolific_pid screenedout valence_bicycle valence_restaurant valence_videogame durationinseconds
drop if prolific_pid == ""
sort prolific_pid
quietly by prolific_pid:  gen dup = cond(_N==1,0,_n)
drop if dup > 0
drop dup  


* Indicator wave 1
gen wave1 = 0
gen wave2 = 1




/* Wide to long */
reshape long beliefs_ valence_ recall_ cu_, i(prolific_pid) j(product) string

save "${pathdata_robustness}/temp/RobustnessExperiment5_long_followup.dta", replace

/* Merged */
merge 1:1 prolific_pid product using "${pathdata_robustness}/temp/RobustnessExperiment5_long.dta"


* ########## CREATE DATA ON ATTRITION IN BETWEEN SUBJECT TREATMENTS ##########	

	
	* indicator whether subjects completed study 2
	gen completed = 0
	replace completed = 1 if _merge ==3
	
	
	* Define prompt condition
	gen prompt = 1
	replace prompt = 0 if inlist(treatment,1,3)
	replace prompt = 1 if inlist(treatment,2,4)	
		
	* ##### Generate dataset with treatments and completion only
preserve
	* drop NAs
	drop if _merge==1

	keep prolific_pid completed prompt
	
	duplicates drop
	
	save "${pathdata_summary}/AssociationAttrition.dta", replace
	
restore
	




* Keep only successful merges
keep if _merge==3
drop _merge

destring*, replace


/* Exclusion Criteria */
drop if screenedout == "True"
drop if condition_ == "noinfo" & p_guess_ != 50

save "${pathdata_robustness}/temp/RobustnessExperiment5_merged.dta", replace


/* Variables for analysis */

/* 1 - Effect variables */
gen diff = 50 - p_guess_
gen diff_recall = 50 - beliefs_


gen effect = diff
replace effect = p_guess_ - 50 if inlist(condition_, "storyshort_pos", "statistic_pos", "storyplacebo_pos", "storyplacebo_pos") 

gen effect_recall = diff_recall
replace effect_recall = beliefs_ - 50 if inlist(condition_, "storyshort_pos", "statistic_pos", "storyplacebo_pos", "storyplacebo_pos")




/* Some changes to notation */
/* Changing the coding of valence for Treatment 4 (story placebo) since there was no negative valence placebo story in this pilot. This has already been changed to include a placebo story with negative valence in the respective Qualtrics survey file). */

/* Changing the naming of story_placebo in Treatment 4 */
replace condition_ = "storyplacebo_pos_prompt" if treatment == 4 & condition_ == "storyplacebo_pos"
replace condition_ = "storyplacebo_neg_prompt" if treatment == 4 & condition_ == "storyplacebo_neg"

/* Changing the naming of statistic_prompt in Treatment 2 */
replace condition_ = "statistic_pos_prompt" if treatment == 2 & condition_ == "statistic_pos"
replace condition_ = "statistic_neg_prompt" if treatment == 2 & condition_ == "statistic_neg"


/* Valence recall */
gen valence_recall = 0
* if constraining valence recall to those who actually were asked to recall valence, i.e. who did not select noinfo as type of information remembered
//replace valence_recall = . if valence_ == .
replace valence_recall = 1 if valence_ == 1 & inlist(condition_, "statistic_pos", "statistic_pos_prompt", "storyplacebo_pos", "storyplacebo_pos_prompt", "storyshort_pos")
replace valence_recall = 1 if valence_ == 2 & inlist(condition_, "statistic_neg", "statistic_neg_prompt", "storyplacebo_neg", "storyplacebo_neg_prompt", "storyshort_neg")

/* Recall type of information */
gen type_recall = 0
replace type_recall = . if recall_ == .
replace type_recall =  1 if condition_=="noinfo" & recall_ ==1
replace type_recall =  1 if inlist(condition_, "statistic_pos", "statistic_neg", "statistic_pos_prompt", "statistic_neg_prompt") & recall_ == 2
replace type_recall =  1 if inlist(condition_, "storyplacebo_pos", "storyplacebo_neg", "storyshort_neg", "storyshort_pos", "storyplacebo_pos_prompt", "storyplacebo_neg_prompt") & recall_ ==3


/* Combined recall */
gen combined_recall = 0
replace combined_recall = 1 if type_recall == 1 & valence_recall == 1
replace combined_recall = 1 if condition == "noinfo" & type_recall == 1

/* Draws for those who selected I don't know */
gen correct_recall = combined_recall
randomselect if valence_ == 3, gen(dontknowvalence) prop(0.5)
randomselect if recall_ == 4, gen(dontknowtype) prop(0.333)
replace correct_recall = 1 if type_recall == 1 & dontknowvalence == 1
replace correct_recall = 1 if valence_recall == 1 & dontknowtype == 1
replace correct_recall = 1 if dontknowvalence == 1 & dontknowtype == 1

tab combined_recall
tab correct_recall


/* Drop missing products */
drop if p_guess_ == .

*tab recall_ treatment , m
*drop if recall_ == .

gen fraction_pos = positive_stat/tot_reviews_pos if inlist(condition_, "statistic_pos", "statistic_pos_prompt")
gen fraction_neg = negative_stat/tot_reviews_neg if inlist(condition_, "statistic_neg", "statistic_neg_prompt")

gen fraction_signal = fraction_pos
replace fraction_signal = fraction_neg if fraction_signal==.

gen extremity = abs(fraction_signal - 0.5)

/*Define objects of interest for graphs*/
gen typestat=.
replace typestat = 1 if inlist(condition_, "statistic_pos", "statistic_neg") & treatment == 1
replace typestat = 2 if inlist(condition_, "statistic_pos_prompt", "statistic_neg_prompt") & treatment == 2

tab recall typestat

tab condition_

gen typestory=.
replace typestory = 1 if inlist(condition_, "storyplacebo_pos", "storyplacebo_neg")
replace typestory = 2 if inlist(condition_, "storyplacebo_pos_prompt", "storyplacebo_neg_prompt")

preserve


drop if condition_ == "noinfo"

/* Drop observations where updating is in the wrong direction */
//tab effect
drop if effect < 0 & inlist(condition_, "statistic_pos", "statistic_neg", "statistic_pos_prompt", "statistic_neg_prompt")

bysort prolific_pid : drop if _N < 2

save "${pathdata_robustness}/RobustnessExperiment5.dta", replace


* ##### Save data for summary statistics

	keep prolific_pid male age college employed

	duplicates drop

	save "${pathdata_summary}/AssociationSummary.dta", replace


use "${pathdata_robustness}/RobustnessExperiment5.dta", clear
drop if _prompt == ""

duplicates drop _prompt, force

keep prolific_pid _prompt

save "${pathdata_robustness}/temp/RobustnessExperiment5_prompt_transcripts.dta", replace
export excel "${pathdata_robustness}/temp/RobustnessExperiment5_prompt_transcripts.xlsx", replace firstrow(variables)



restore


